This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
Population density 2015 data source:
https://www.volksgezondheidenzorg.info/onderwerp/bevolking/regionaal-internationaal/bevolkingsomvang
# Package installs
# ("TDAmapper")
# install.packages("ggplot2")
# install.packages("devtools")
# devtools::install_github("paultpearson/TDAmapper")
# devtools::install_github("christophergandrud/networkD3")
# install.packages("factoextra")
# install.packages("cluster")
# install.packages("magrittr")
# install.packages("fpc")
# install.packages("dbscan")
# Import packages
library(devtools)
library(TDAmapper)
library(ggplot2)
library(igraph)
library(networkD3)
library("cluster")
library("factoextra")
library("magrittr")
library("fpc")
library("dbscan")
Get coordinate, infection data from NL municipalities. We read the infections as a list of dataframes, the coordinates are a separate dataframe.
# Check working directory and read csv
wd = getwd()
coords_directory <- ("./data/COVIDNL_COORDS.csv")
popdensity_directory <- ("./data/bevdh2015.csv")
coords = read.csv(coords_directory, header = TRUE, sep=",", stringsAsFactors = FALSE)
popdensity = read.csv(popdensity_directory, header = TRUE, sep=";", stringsAsFactors = FALSE, colClasses=c("Inwoners.2015"="character","Inwoners.per.km2"="character"))
# Drop useless columns
popdensity = subset(popdensity, select = -c(ID, Indicator))
coords = subset(coords, select = -c(X))
# Remove the . values in the file as they screw up numerical values
popdensity$Inwoners.2015 <- as.numeric(sub('\\.', '', popdensity$Inwoners.2015))
popdensity$Inwoners.per.km2 <- as.numeric(sub('\\.', '', popdensity$Inwoners.per.km2))
# Set wd to data folder so we can read all the csv's as a list
tempwd = paste(wd, "data", "COVIDNL", sep="/")
tempwd
[1] "J:/Documents/2IMG10/project/2img10-group-6/data/COVIDNL"
setwd(tempwd)
The working directory was changed to J:/Documents/2IMG10/project/2img10-group-6/data/COVIDNL inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
# Read csv's as list
temp = list.files(pattern="*.csv")
temp
[1] "COVID0104.csv" "COVID0204.csv" "COVID0303.csv" "COVID0304.csv" "COVID0403.csv" "COVID0404.csv" "COVID0503.csv" "COVID0504.csv" "COVID0603.csv"
[10] "COVID0604.csv" "COVID0703.csv" "COVID0704.csv" "COVID0803.csv" "COVID0804.csv" "COVID0903.csv" "COVID0904.csv" "COVID1003.csv" "COVID1004.csv"
[19] "COVID1103.csv" "COVID1104.csv" "COVID1203.csv" "COVID1204.csv" "COVID1303.csv" "COVID1304.csv" "COVID1403.csv" "COVID1404.csv" "COVID1503.csv"
[28] "COVID1504.csv" "COVID1603.csv" "COVID1604.csv" "COVID1703.csv" "COVID1704.csv" "COVID1803.csv" "COVID1804.csv" "COVID1903.csv" "COVID1904.csv"
[37] "COVID2003.csv" "COVID2004.csv" "COVID2103.csv" "COVID2104.csv" "COVID2203.csv" "COVID2303.csv" "COVID2403.csv" "COVID2503.csv" "COVID2603.csv"
[46] "COVID2703.csv" "COVID2803.csv" "COVID2903.csv" "COVID3003.csv" "COVID3103.csv"
infections = lapply(temp, read.delim, header = TRUE, sep = ";", stringsAsFactors = FALSE)
# Set wd back
setwd(wd)
# Show read data, the coordinates contain some column 'x' but we just ignore this...
coords
popdensity
infections
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
[[11]]
[[12]]
[[13]]
[[14]]
[[15]]
[[16]]
[[17]]
[[18]]
[[19]]
[[20]]
[[21]]
[[22]]
[[23]]
[[24]]
[[25]]
[[26]]
[[27]]
[[28]]
[[29]]
[[30]]
[[31]]
[[32]]
[[33]]
[[34]]
[[35]]
[[36]]
[[37]]
[[38]]
[[39]]
[[40]]
[[41]]
[[42]]
[[43]]
[[44]]
[[45]]
[[46]]
[[47]]
[[48]]
[[49]]
[[50]]
length(infections)
[1] 50
Do some merging so that we have both the coordinate and infection data in each dataframe.
# Merge data on municipality
merged = list();
coords_popdensity <- merge(x=coords, y=popdensity, by.x=c('Municipality'), by.y=c('Gemeente'), sort = TRUE)
# Not the most efficient but it does the job,
# we see that not all files have the same format so we check for the 2 most common ones
i = 1
for (inf in infections) {
d = substr(temp[[i]], 6,9)
print(d)
inf$Date <- d
inf[is.na(inf)] <- 0
if("Gemeente" %in% colnames(inf)) {
new = merge(x=coords_popdensity, y=inf, by.x=c('Municipality'), by.y=c('Gemeente'), sort = TRUE)
merged <- c(merged, list(new))
} else if("Category" %in% colnames(inf)) {
new = merge(x=coords_popdensity, y=inf, by.x=c('Municipality'), by.y=c('Category'), sort = TRUE)
merged <- c(merged, list(new))
} # Disregard others
i = i + 1
}
[1] "0104"
[1] "0204"
[1] "0303"
[1] "0304"
[1] "0403"
[1] "0404"
[1] "0503"
[1] "0504"
[1] "0603"
[1] "0604"
[1] "0703"
[1] "0704"
[1] "0803"
[1] "0804"
[1] "0903"
[1] "0904"
[1] "1003"
[1] "1004"
[1] "1103"
[1] "1104"
[1] "1203"
[1] "1204"
[1] "1303"
[1] "1304"
[1] "1403"
[1] "1404"
[1] "1503"
[1] "1504"
[1] "1603"
[1] "1604"
[1] "1703"
[1] "1704"
[1] "1803"
[1] "1804"
[1] "1903"
[1] "1904"
[1] "2003"
[1] "2004"
[1] "2103"
[1] "2104"
[1] "2203"
[1] "2303"
[1] "2403"
[1] "2503"
[1] "2603"
[1] "2703"
[1] "2803"
[1] "2903"
[1] "3003"
[1] "3103"
merged
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
[[11]]
[[12]]
[[13]]
[[14]]
[[15]]
[[16]]
[[17]]
[[18]]
[[19]]
[[20]]
[[21]]
Compute interesting features from the data.
Implement a basic mapper from TDAmapper library. See also the tutorial from: http://bertrand.michel.perso.math.cnrs.fr/Enseignements/TDA/Mapper.html
# Creates a mapper object using the TDAmapper library
create_tda_mapper <- function(filter_values, x_coord, y_coord, intervals=25, num_bins=10, overlap=30) {
basic.coords = data.frame(x_coord, y_coord)
# Compute distance matrix
basic.dist = dist(basic.coords)
length(filter_values)
if(length(filter_values) == 2) {
twodim_intervals = c(intervals, intervals)
return (
mapper2D(
dist_object = basic.dist,
filter_values = filter_values, # Expects a list of two vectors
num_intervals = twodim_intervals,
percent_overlap = overlap,
num_bins_when_clustering = num_bins
)
)
}
return (
mapper(
dist_object = basic.dist,
filter_values = filter_values,
num_intervals = intervals,
percent_overlap = overlap,
num_bins_when_clustering = num_bins
)
)
}
# Creates a basic plot of the given mapper object (TDAmapper class)
# Should also work if you give a custom object with adjacency matrix of nodes (Not tested)
plot_mapper <- function (mapper) {
# Plot graph
graph <- graph.adjacency(mapper$adjacency, mode="undirected")
plot(graph, layout = layout.auto(graph) )
return(graph)
}
# Grab some data
first = merged[[3]] # 03/03
second = merged[[32]] # 20/03
third = merged[[1]] # 01/04
first
second
third
firstMapper = create_tda_mapper(first$Aantal, first$Latitude, first$Longitude)
plot_mapper(firstMapper)
IGRAPH 8325288 U--- 6 0 --
+ edges from 8325288:
secondMapper = create_tda_mapper(second$Aantal, second$Latitude, second$Longitude)
plot_mapper(secondMapper)
IGRAPH 835982b U--- 24 10 --
+ edges from 835982b:
[1] 1-- 2 2-- 3 3-- 5 4-- 6 8--10 9--12 11--13 12--14 17--18 22--23
thirdMapper = create_tda_mapper(third$Aantal, third$Latitude, third$Longitude)
plot_mapper(thirdMapper)
IGRAPH 839332a U--- 24 14 --
+ edges from 839332a:
[1] 1-- 2 2-- 3 3-- 4 3-- 5 4-- 7 4-- 8 5-- 7 6-- 9 11--14 15--16 15--17 18--19 19--22 20--21
A more advanced example using the networkD3 library which yields way nicer visualizations
# To be sure that we have this imported
# library(networkD3)
# Create a mapper object
mapper = create_tda_mapper(third$Aantal, third$Latitude, third$Longitude, 20, 10, 30)
plotForceNetwork <- function(mapper, labels) {
MapperNodes <- mapperVertices(mapper, labels)
MapperLinks <- mapperEdges(mapper)
forceNetwork(
Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodename",
Group = "Nodegroup", opacity = 1, opacityNoHover = 1,
linkDistance = 25, charge = -10,
Nodesize = "Nodesize", legend = T
)
}
plotForceNetwork(mapper, first$Aantal)
NA
Other mapper solutions:
More advanced force network which computes some other statistics on the nodes
mapper = create_tda_mapper(second$Aantal, second$Longitude, second$Latitude, 20, 10, 30)
MapperNodes <- mapperVertices(mapper, second$Aantal )
MapperLinks <- mapperEdges(mapper)
municipalities <- MapperNodes$Nodename
sums <- c()
# Map countries to label names
for (i in (1: nrow(MapperNodes))){
sum = 0
temp <- paste("V",i, ":", sep= "")
split <- strsplit(MapperNodes$Nodename[i], " ")
for (j in (2 : (length(split[[1]])) ) ) {
num_other = as.integer(gsub(",","",split[[1]][[j]]))
index = match(num_other, second$Aantal)
prov <- second$Municipality[[index]]
sum <- sum + num_other
temp <- paste(temp, prov)
}
sums[i] <- sum
municipalities[i] <- paste(temp, sep = ", ")
}
for (i in (1: nrow(MapperNodes))){
MapperNodes$Nodegroup[i] <- i
}
MapperNodes$Nodesize <- sums*0.02
MapperNodes$Nodename <- municipalities
MapperNodes$NodeCases <- sums
municipalities
[1] "V1: Aalsmeer Beesel Beuningen Beemster Barneveld Barneveld Alphen-Chaam Barneveld Beuningen Brummen Dalfsen Beemster Aa en Hunze Borsele Achtkarspelen Albrandswaard Almelo Aa en Hunze Borsele Achtkarspelen Aa en Hunze Alblasserdam Barneveld Almelo Borne Alphen-Chaam Alblasserdam Barneveld Alblasserdam Brummen Alblasserdam Alblasserdam Aa en Hunze Alblasserdam Albrandswaard Brummen Brummen Borsele Alphen-Chaam Aa en Hunze Brummen Dalfsen Alblasserdam Almelo Brummen Aa en Hunze Brummen Aa en Hunze Aa en Hunze Beemster Alphen-Chaam Alphen-Chaam Borne Brummen Almelo Beemster Barneveld Beuningen Alblasserdam Alphen-Chaam Brummen Albrandswaard Albrandswaard Almelo Aa en Hunze Beemster Brummen Aa en Hunze Alblasserdam Borsele Aa en Hunze Aa en Hunze Aalsmeer Brummen Alblasserdam Beemster Aalsmeer Albrandswaard Brummen Achtkarspelen Borsele Beuningen Borsele Barneveld Aa en Hunze Alphen-Chaam Aa en Hunze Bergen op Zoom Barneveld Brummen Beemster Aalsmeer Alblasserdam Alphen-Chaam Druten Borne Beesel Beuningen Alphen-Chaam Achtkarspelen Alphen-Chaam Borsele Albrandswaard Aalsmeer Albrandswaard Albrandswaard Achtkarspelen Albrandswaard Alphen-Chaam Aalsmeer Brummen Aalsmeer Beesel Borsele Alblasserdam Barneveld Druten Brummen Beemster Dalfsen Brummen Beemster Borne Dalfsen Albrandswaard Borsele Albrandswaard Barneveld Brummen Aa en Hunze Beesel Barneveld Borsele Alblasserdam Beemster Beesel Albrandswaard Aalsmeer Dalfsen Albrandswaard Alblasserdam Alblasserdam Aalsmeer Alblasserdam Albrandswaard Aalsmeer Borne Alblasserdam Borsele Beesel Alblasserdam Aa en Hunze Aa en Hunze Alblasserdam Bergen op Zoom Dalfsen Aa en Hunze Alphen-Chaam Alphen-Chaam Beuningen Brummen Aalsmeer Almelo Almelo Albrandswaard Beuningen Aalsmeer Borne Achtkarspelen Beesel Brummen Bergen op Zoom Alphen-Chaam Borsele Aalsmeer Druten Brummen Dalfsen Alphen-Chaam Beesel Almelo Beesel Bergen op Zoom Alphen-Chaam Beuningen Almelo Brummen Beesel Brummen Borsele Beuningen Borne Beesel Albrandswaard Aa en Hunze Beesel Barneveld Beemster Aalsmeer Borne Albrandswaard Aa en Hunze Dalfsen Beesel Barneveld Barneveld Druten Borsele Aa en Hunze Dalfsen Druten Albrandswaard Borsele Bergen op Zoom Albrandswaard Dalfsen Beuningen Alblasserdam Barneveld Bergen op Zoom Alblasserdam Achtkarspelen Achtkarspelen Achtkarspelen Beemster Beesel"
[2] "V2: Apeldoorn Almelo Beuningen Heerde Beuningen Bergen op Zoom Neder-Betuwe Culemborg Culemborg Beuningen Almelo Geldrop-Mierlo Almelo Brunssum Heerhugowaard Culemborg Druten Boekel Geldrop-Mierlo Alkmaar Heerde Brunssum Barendrecht Brunssum Barendrecht Apeldoorn Etten-Leur Brunssum Brunssum Culemborg Beuningen Brunssum Etten-Leur Bergen op Zoom Druten Brunssum Apeldoorn Druten Neder-Betuwe Deventer Barendrecht Brunssum Boekel Beuningen Geldrop-Mierlo Brunssum Beuningen Beuningen Bergen op Zoom Deventer Bergen op Zoom Almelo Boekel Barendrecht Culemborg Beuningen Geldrop-Mierlo Neder-Betuwe Apeldoorn Apeldoorn Heerde Barendrecht Druten Apeldoorn Bergen op Zoom Geldrop-Mierlo Boekel Deventer Beuningen Almelo Brunssum Almelo Heerhugowaard Apeldoorn Almelo Geldrop-Mierlo Deventer Heerde Almelo Beuningen Lelystad Almelo Bergen op Zoom Geldrop-Mierlo Druten Alkmaar Heerhugowaard Hollands Kroon Geldrop-Mierlo"
[3] "V3: Enschede Enschede Etten-Leur Alkmaar Etten-Leur Purmerend Amstelveen Dordrecht Alkmaar Boekel Alphen aan den Rijn Haarlem Haarlemmermeer Haarlemmermeer Amstelveen Amersfoort Boekel Lelystad Amstelveen Boekel Amersfoort Amersfoort Arnhem Overbetuwe Enschede Boekel Deurne Amstelveen Haarlemmermeer Amstelveen Cranendonck Amstelveen"
[4] "V4: Purmerend Zaanstad Overbetuwe Cranendonck Helmond"
[5] "V5: Deurne Gemert-Bakel"
[6] "V6: Almere"
[7] "V7: Heerlen Gemert-Bakel Sittard-Geleen"
[8] "V8: Bernheze"
[9] "V9: 's-Hertogenbosch"
[10] "V10: Eindhoven Maastricht 's-Hertogenbosch 's-Hertogenbosch Heerlen Maastricht"
[11] "V11: Maastricht"
[12] "V12: Maastricht Oss"
[13] "V13: Breda"
[14] "V14: Peel en Maas"
[15] "V15: Tilburg"
[16] "V16: Rotterdam"
[17] "V17: Rotterdam"
[18] "V18: Amsterdam"
MapperNodes
forceNetwork(Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodegroup",
Group = "Nodename", opacity = 1, opacityNoHover = 1,
linkDistance = 75, charge = -10,
Nodesize = "Nodesize")
Trying out custom clustering + filter values: Also see: https://www.datanovia.com/en/blog/types-of-clustering-methods-overview-and-quick-start-r-code/
first.subset <- subset(first, select = c(Latitude, Longitude))
second.subset <- subset(second, select = c(Aantal, Inwoners.per.km2))
third.subset <- subset(third, select = c(Aantal, Aantal.per.100.000.inwoners))
# I assume here that the ordering is correct
row.names(first.subset) <- first$Municipality
row.names(second.subset) <- second$Municipality
row.names(third.subset) <- third$Municipality
print(first.subset)
print(second.subset)
print(third.subset)
# Computes the 'optimal' number of kmeans clusters for our data
fviz_nbclust(first.subset, kmeans, method = "gap_stat")
Error in fviz_nbclust(first.subset, kmeans, method = "gap_stat") :
could not find function "fviz_nbclust"
Define a custom filter function
# print(kmeans.first)
prov_first = setNames(aggregate(first$Aantal, by=list(Province=first$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(first$Latitude, by=list(Province=first$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(first$Longitude, by=list(Province=first$Province), FUN=mean), c('Province', 'Longitude'))
prov_first = merge(prov_first, prov_lat, by.x="Province", by.y="Province")
prov_first = merge(prov_first, prov_long, by.x="Province", by.y="Province")
prov_first
prov_second = setNames(aggregate(second$Aantal, by=list(Province=second$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(second$Latitude, by=list(Province=second$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(second$Longitude, by=list(Province=second$Province), FUN=mean), c('Province', 'Longitude'))
prov_second = merge(prov_second, prov_lat, by.x="Province", by.y="Province")
prov_second = merge(prov_second, prov_long, by.x="Province", by.y="Province")
prov_second
prov_third = setNames(aggregate(third$Aantal, by=list(Province=third$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(third$Latitude, by=list(Province=third$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(third$Longitude, by=list(Province=third$Province), FUN=mean), c('Province', 'Longitude'))
prov_third = merge(prov_third, prov_lat, by.x="Province", by.y="Province")
prov_third = merge(prov_third, prov_long, by.x="Province", by.y="Province")
prov_third
mapper = create_tda_mapper(prov_first$Aantal, prov_first$Longitude, prov_first$Latitude, 20, 10, 30)
MapperNodes <- mapperVertices(mapper, prov_first$Aantal)
MapperLinks <- mapperEdges(mapper)
province <- MapperNodes$Nodename
sums <- 0
# Map countries to label names
for (i in (1: nrow(MapperNodes))){
print(i)
sum = 0
temp <- paste("V",i, ":", sep= "")
split <- strsplit(MapperNodes$Nodename[i], " ")
for (j in (2 : (length(split[[1]])) ) ) {
num_other = as.integer(gsub(",","",split[[1]][[j]]))
index = match(num_other, prov_first$Aantal)
print(index)
prov <- prov_first$Province[[index]]
sum <- sum + num_other
temp <- paste(temp, prov)
}
province[i] <- paste(temp, sep = ", ")
sums[i] <- sum
print(province[i])
}
[1] 1
[1] 11
[1] 1
[1] "V1: Zeeland Drenthe"
[1] 2
[1] 3
[1] "V2: Friesland"
[1] 3
[1] 5
[1] "V3: Groningen"
[1] 4
[1] 2
[1] "V4: Flevoland"
[1] 5
[1] 11
[1] "V5: Zeeland"
[1] 6
[1] 9
[1] "V6: Overijssel"
[1] 7
[1] 10
[1] "V7: Utrecht"
[1] 8
[1] 9
[1] "V8: Overijssel"
[1] 9
[1] 6
[1] "V9: Limburg"
[1] 10
[1] 4
[1] "V10: Gelderland"
[1] 11
[1] 6
[1] "V11: Limburg"
[1] 12
[1] 12
[1] "V12: Zuid-Holland"
[1] 13
[1] 8
[1] "V13: Noord-Holland"
[1] 14
[1] 7
[1] "V14: Noord-Brabant"
MapperNodes$Nodesize <- sums *0.02
for (i in (1: nrow(MapperNodes))){
MapperNodes$Nodegroup[i] <- i
}
means
NULL
MapperNodes$Nodename <- province
MapperNodes$NodeCases <- sums
MapperNodes
forceNetwork(Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodegroup",
Group = "Nodename", opacity = 1, opacityNoHover = 1,
linkDistance = 50, charge = -10,
Nodesize = "Nodesize")
NA
GERMANY
source("bootstrap.R")
Loading required package: tidyverse
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v tibble 3.0.1 v dplyr 1.0.0
v tidyr 1.1.0 v stringr 1.4.0
v readr 1.3.1 v forcats 0.5.0
v purrr 0.3.4
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
x purrr::compose() masks igraph::compose()
x tidyr::crossing() masks igraph::crossing()
x tidyr::extract() masks magrittr::extract()
x dplyr::filter() masks stats::filter()
x dplyr::groups() masks igraph::groups()
x dplyr::lag() masks stats::lag()
x purrr::set_names() masks magrittr::set_names()
x purrr::simplify() masks igraph::simplify()
Loading required package: readxl
Loading required package: janitor
Attaching package: 㤼㸱janitor㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
chisq.test, fisher.test
Loading required package: TDA
Attaching package: 㤼㸱TDA㤼㸲
The following object is masked from 㤼㸱package:cluster㤼㸲:
silhouette
Loading required package: gridGraphics
Loading required package: grid
Loading required package: rgdal
Loading required package: sp
rgdal: version: 1.5-10, (SVN revision 1006)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
Path to GDAL shared files: J:/Program Files (x86)/R-4.0.0/library/rgdal/gdal
GDAL binary built with GEOS: TRUE
Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
Path to PROJ shared files: J:/Program Files (x86)/R-4.0.0/library/rgdal/proj
Linking to sp version:1.4-2
To mute warnings of possible GDAL/OSR exportToProj4() degradation,
use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
Loading required package: maps
Attaching package: 㤼㸱maps㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
map
The following object is masked from 㤼㸱package:cluster㤼㸲:
votes.repub
Loading required package: fastcluster
Attaching package: 㤼㸱fastcluster㤼㸲
The following object is masked from 㤼㸱package:stats㤼㸲:
hclust
Loading required package: zoo
Attaching package: 㤼㸱zoo㤼㸲
The following objects are masked from 㤼㸱package:base㤼㸲:
as.Date, as.Date.numeric
Loading required package: lubridate
Attaching package: 㤼㸱lubridate㤼㸲
The following objects are masked from 㤼㸱package:igraph㤼㸲:
%--%, union
The following objects are masked from 㤼㸱package:base㤼㸲:
date, intersect, setdiff, union
The `value` argument of ``names<-`()` must have the same length as `x` as of tibble 3.0.0.
`names` must have length 23, not 24.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.The `value` argument of ``names<-`()` must have the same length as `x` as of tibble 3.0.0.
`names` must have length 22, not 23.
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.Joining, by = c("Municipality", "Province")
Joining, by = c("Municipality", "Province")
Joining, by = c("Province", "Date")
Joining, by = c("Province", "Date")
Joining, by = c("Province", "Date", "Daily")
Joining, by = c("Province", "Date", "Total")
Parsed with column specification:
cols(
geoId = col_character(),
Latitude = col_double(),
Longitude = col_double(),
countriesAndTerritories = col_character()
)
Parsed with column specification:
cols(
zip = col_double(),
City = col_character(),
Longitude = col_double(),
Latitude = col_double()
)
Parsed with column specification:
cols(
DATE = col_date(format = ""),
NIS5 = col_double(),
TX_DESCR_NL = col_character(),
TX_DESCR_FR = col_character(),
TX_ADM_DSTR_DESCR_NL = col_character(),
TX_ADM_DSTR_DESCR_FR = col_character(),
TX_PROV_DESCR_NL = col_character(),
TX_PROV_DESCR_FR = col_character(),
TX_RGN_DESCR_NL = col_character(),
TX_RGN_DESCR_FR = col_character(),
CASES = col_character()
)
Joining, by = "City"
Parsed with column specification:
cols(
Provinces = col_character(),
Latitude = col_double(),
Longitude = col_double()
)
Parsed with column specification:
cols(
time_iso8601 = col_datetime(format = ""),
de_sh = col_double(),
de_hh = col_double(),
de_ni = col_double(),
de_hb = col_double(),
de_nw = col_double(),
de_he = col_double(),
de_rp = col_double(),
de_bw = col_double(),
de_by = col_double(),
de_sl = col_double(),
de_bb = col_double(),
de_mv = col_double(),
de_sn = col_double(),
de_st = col_double(),
de_th = col_double(),
de_be = col_double(),
sum_cases = col_double()
)
Joining, by = "Provinces"
wd = getwd()
wd
[1] "J:/Documents/2IMG10/project/2img10-group-6"
germanyLatLong <- read_csv("./data/duitsland-latlong.csv")
Parsed with column specification:
cols(
Provinces = col_character(),
Latitude = col_double(),
Longitude = col_double()
)
germany <- read_csv("./data/duitsland.csv") %>%
rename(
dateRep = `time_iso8601`,
`Schleswig-Holstein` = de_sh ,
`Hamburg` = de_hh ,
`Niedersachsen` = de_ni ,
`Bremen` = de_hb ,
`Nordrhein-Westfalen` = de_nw ,
`Hessen` = de_he ,
`Rheinland-Pfalz` =de_rp ,
`Baden-Wurttemberg` = de_bw ,
`Bayern` = de_by ,
`Saarland` = de_sl ,
`Brandenburg`= de_bb ,
`Mecklenburg-Vorpommern` = de_mv ,
`Sachsen` =de_sn ,
`Sachsen-Anhalt` = de_st ,
`Thuringen` = de_th ,
`Berlin` = de_be ,
SumCases = sum_cases
) %>%
pivot_longer(-c(dateRep, SumCases ), names_to = "Provinces", values_to = "Cases") %>%
select(-c("SumCases")) %>%
left_join(germanyLatLong)
Parsed with column specification:
cols(
time_iso8601 = col_datetime(format = ""),
de_sh = col_double(),
de_hh = col_double(),
de_ni = col_double(),
de_hb = col_double(),
de_nw = col_double(),
de_he = col_double(),
de_rp = col_double(),
de_bw = col_double(),
de_by = col_double(),
de_sl = col_double(),
de_bb = col_double(),
de_mv = col_double(),
de_sn = col_double(),
de_st = col_double(),
de_th = col_double(),
de_be = col_double(),
sum_cases = col_double()
)
Joining, by = "Provinces"
germany_long <- germany
germany <- germany %>%
pivot_wider(names_from = dateRep, values_from = Cases )
rm(germanyLatLong)
germany
inf = germany$`2020-03-20 17:00:00`
mapper = create_tda_mapper(inf, germany$Longitude, germany$Latitude, 20, 10, 30)
MapperNodes <- mapperVertices(mapper, inf)
MapperLinks <- mapperEdges(mapper)
province <- MapperNodes$Nodename
sums <- c()
# Map countries to label names
for (i in (1: nrow(MapperNodes))){
sum = 0
print(i)
temp <- paste("V",i, ":", sep= "")
split <- strsplit(MapperNodes$Nodename[i], " ")
for (j in (2 : (length(split[[1]])) ) ) {
num_other = as.integer(gsub(",","",split[[1]][[j]]))
index = match(num_other, inf)
prov <- germany$Provinces[[index]]
temp <- paste(temp, prov)
sum <- sum + num_other
}
province[i] <- paste(temp, sep = ", ")
print(province[i])
sums[i] <- sum
}
[1] 1
[1] "V1: Saarland Brandenburg Bremen Mecklenburg-Vorpommern Sachsen-Anhalt Thuringen"
[1] 2
[1] "V2: Schleswig-Holstein"
[1] 3
[1] "V3: Sachsen"
[1] 4
[1] "V4: Rheinland-Pfalz"
[1] 5
[1] "V5: Hamburg"
[1] 6
[1] "V6: Sachsen Berlin"
[1] 7
[1] "V7: Hessen"
[1] 8
[1] "V8: Rheinland-Pfalz"
[1] 9
[1] "V9: Niedersachsen"
[1] 10
[1] "V10: Bayern"
[1] 11
[1] "V11: Baden-Wurttemberg"
[1] 12
[1] "V12: Baden-Wurttemberg"
[1] 13
[1] "V13: Nordrhein-Westfalen"
MapperNodes$Nodesize <- sums * 0.01
for (i in (1: nrow(MapperNodes))){
MapperNodes$Nodegroup[i] <- i
}
MapperNodes$Nodename <- province
MapperNodes$NodeCases <- sums
MapperNodes
forceNetwork(Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodegroup",
Group = "Nodegroup", opacity = 1, opacityNoHover = 1,
linkDistance = 75, charge = -10,
Nodesize = "Nodesize")
It looks like Source/Target is not zero-indexed. This is required in JavaScript and so your plot may not render.
BELGIUM
zipp <- read_csv("./data/zipcode-belgium.csv", col_names = c("zip","City" ,"Longitude", "Latitude" ))
Parsed with column specification:
cols(
zip = col_double(),
City = col_character(),
Longitude = col_double(),
Latitude = col_double()
)
df <- read_csv("data/COVID19BE_CASES_MUNI.csv") %>%
select(DATE, `TX_DESCR_NL`, `TX_PROV_DESCR_NL`, `TX_ADM_DSTR_DESCR_NL`,`TX_RGN_DESCR_NL`, `CASES`) %>%
rename( dateRep = DATE,
Province =TX_PROV_DESCR_NL ,
City = TX_DESCR_NL,
Taal = TX_RGN_DESCR_NL,
Cases = CASES,
Municipality = TX_ADM_DSTR_DESCR_NL ) %>%
left_join( zipp)
Parsed with column specification:
cols(
DATE = col_date(format = ""),
NIS5 = col_double(),
TX_DESCR_NL = col_character(),
TX_DESCR_FR = col_character(),
TX_ADM_DSTR_DESCR_NL = col_character(),
TX_ADM_DSTR_DESCR_FR = col_character(),
TX_PROV_DESCR_NL = col_character(),
TX_PROV_DESCR_FR = col_character(),
TX_RGN_DESCR_NL = col_character(),
TX_RGN_DESCR_FR = col_character(),
CASES = col_character()
)
Joining, by = "City"
problems(df)
# wrangling
df$Cases[which(df$Cases == "<5")] = 0
df$Cases <- parse_integer(df$Cases)
# Replace NA's with most recent values
df$Cases <- as.integer(na.locf(df$Cases) )
belgie_long <- df
# set to match the world data set or NL Data Set
df <- df %>%
pivot_wider(names_from = dateRep, values_from = Cases) #%>%
df <- na.locf(df)
belgie <- df
rm(df,zipp)
belgie
# print(kmeans.first)
prov_first = setNames(aggregate(belgie$`2020-03-04`, by=list(Province=belgie$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Longitude'))
prov_first = merge(prov_first, prov_lat, by.x="Province", by.y="Province")
prov_first = merge(prov_first, prov_long, by.x="Province", by.y="Province")
prov_first
prov_second = setNames(aggregate(belgie$`2020-03-20`, by=list(Province=belgie$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Longitude'))
prov_second = merge(prov_second, prov_lat, by.x="Province", by.y="Province")
prov_second = merge(prov_second, prov_long, by.x="Province", by.y="Province")
prov_second
prov_third = setNames(aggregate(belgie$`2020-04-02`, by=list(Province=belgie$Province), FUN=sum), c('Province', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Province=belgie$Province), FUN=mean), c('Province', 'Longitude'))
prov_third = merge(prov_third, prov_lat, by.x="Province", by.y="Province")
prov_third = merge(prov_third, prov_long, by.x="Province", by.y="Province")
prov_third
mapper = create_tda_mapper(prov_first$Aantal, prov_first$Longitude, prov_first$Latitude, 20, 10, 30)
MapperNodes <- mapperVertices(mapper, prov_first$Aantal)
MapperLinks <- mapperEdges(mapper)
MapperNodes
province <- MapperNodes$Nodename
sums <- 0
# Map countries to label names
for (i in (1: nrow(MapperNodes))){
print(i)
sum = 0
temp <- paste("V",i, ":", sep= "")
split <- strsplit(MapperNodes$Nodename[i], " ")
for (j in (2 : (length(split[[1]])) ) ) {
num_other = as.integer(gsub(",","",split[[1]][[j]]))
index = match(num_other, prov_first$Aantal)
prov <- prov_first$Province[[index]]
temp <- paste(temp, prov)
sum <- sum + num_other
}
province[i] <- paste(temp, sep = ", ")
print(province[i])
sums[i] <- sum
}
[1] 1
[1] "V1: Provincie Waals-Brabant"
[1] 2
[1] "V2: Provincie Limburg"
[1] 3
[1] "V3: Provincie Namen"
[1] 4
[1] "V4: Provincie Luxemburg"
[1] 5
[1] "V5: Provincie Luxemburg"
[1] 6
[1] "V6: Provincie Vlaams-Brabant"
[1] 7
[1] "V7: Provincie Oost-Vlaanderen"
[1] 8
[1] "V8: Provincie West-Vlaanderen"
[1] 9
[1] "V9: Provincie Henegouwen"
[1] 10
[1] "V10: Provincie Antwerpen"
[1] 11
[1] "V11: Provincie Luik"
for (i in (1: nrow(MapperNodes))){
MapperNodes$Nodegroup[i] <- i
}
MapperNodes$Nodesize <- sums * 0.02
MapperNodes$NodeCases <- sums
MapperNodes$Nodename <- province
MapperNodes
forceNetwork(Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodegroup",
Group = "Nodename", opacity = 1, opacityNoHover = 1,
linkDistance = 75, charge = -10,
Nodesize = "Nodesize")
It looks like Source/Target is not zero-indexed. This is required in JavaScript and so your plot may not render.
# print(kmeans.first)
prov_first = setNames(aggregate(belgie$`2020-03-04`, by=list(Municipality=belgie$Municipality), FUN=sum), c('Municipality', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Longitude'))
prov_first = merge(prov_first, prov_lat, by.x="Municipality", by.y="Municipality")
prov_first = merge(prov_first, prov_long, by.x="Municipality", by.y="Municipality")
prov_first
prov_second = setNames(aggregate(belgie$`2020-03-20`, by=list(Municipality=belgie$Municipality), FUN=sum), c('Municipality', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Longitude'))
prov_second = merge(prov_second, prov_lat, by.x="Municipality", by.y="Municipality")
prov_second = merge(prov_second, prov_long, by.x="Municipality", by.y="Municipality")
prov_second
prov_third = setNames(aggregate(belgie$`2020-04-01`, by=list(Municipality=belgie$Municipality), FUN=sum), c('Municipality', 'Aantal'))
prov_lat = setNames(aggregate(belgie$Latitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Latitude'))
prov_long = setNames(aggregate(belgie$Longitude, by=list(Municipality=belgie$Municipality), FUN=mean), c('Municipality', 'Longitude'))
prov_third = merge(prov_third, prov_lat, by.x="Municipality", by.y="Municipality")
prov_third = merge(prov_third, prov_long, by.x="Municipality", by.y="Municipality")
prov_third
mapper = create_tda_mapper(prov_first$Aantal, prov_first$Longitude, prov_first$Latitude, 20, 10, 30)
MapperNodes <- mapperVertices(mapper, prov_first$Aantal)
MapperLinks <- mapperEdges(mapper)
# Compute mean
vertex.mean = c()
mapper$points_in_level_set
[[1]]
[1] 22
[[2]]
NULL
[[3]]
[1] 2 12 15 31 35 41 43
[[4]]
[1] 15 31 33 35 37 43
[[5]]
[1] 1 5 20 34
[[6]]
[1] 3 8 9 10 11 26 32 42
[[7]]
[1] 3 10 14 18 21 25 27 36 38 42
[[8]]
[1] 6 29
[[9]]
[1] 6 7
[[10]]
[1] 7 13 16 28
[[11]]
[1] 19
[[12]]
NULL
[[13]]
[1] 24
[[14]]
NULL
[[15]]
[1] 23
[[16]]
[1] 30
[[17]]
[1] 39
[[18]]
[1] 39 40
[[19]]
NULL
[[20]]
[1] 4 17
for (i in 1:length(mapper$points_in_vertex)){
vertex.mean <- c(vertex.mean, mean(mapper$points_in_vertex[[i]]))
}
municipalities <- MapperNodes$Nodename
# Map countries to label names
for (i in (1: nrow(MapperNodes))){
print(i)
temp <- paste("V",i, ":", sep= "")
split <- strsplit(MapperNodes$Nodename[i], " ")
for (j in (2 : (length(split[[1]])) ) ) {
num_other = as.integer(gsub(",","",split[[1]][[j]]))
index = match(num_other, prov_first$Aantal)
prov <- prov_first$Municipality[[index]]
temp <- paste(temp, prov)
}
municipalities[i] <- paste(temp, sep = ", ")
print(municipalities[i])
}
[1] 1
[1] "V1: Arrondissement La Louvière"
[1] 2
[1] "V2: Arrondissement Aarlen"
[1] 3
[1] "V3: Arrondissement Aarlen Arrondissement Aarlen Arrondissement Eeklo Arrondissement Eeklo Arrondissement Eeklo Arrondissement Eeklo"
[1] 4
[1] "V4: Arrondissement Eeklo Arrondissement Eeklo Arrondissement Eeklo Arrondissement Philippeville Arrondissement Philippeville Arrondissement Eeklo"
[1] 5
[1] "V5: Arrondissement Aalst"
[1] 6
[1] "V6: Arrondissement Aalst"
[1] 7
[1] "V7: Arrondissement Aalst Arrondissement Aalst"
[1] 8
[1] "V8: Arrondissement Brugge Arrondissement Brugge Arrondissement Aat Arrondissement Brugge Arrondissement Brussel-Hoofdstad Arrondissement Aat Arrondissement Brugge Arrondissement Aat"
[1] 9
[1] "V9: Arrondissement Aat Arrondissement Doornik-Moeskroen Arrondissement Doornik-Moeskroen Arrondissement Doornik-Moeskroen Arrondissement Doornik-Moeskroen Arrondissement Aat Arrondissement Doornik-Moeskroen Arrondissement Mechelen Arrondissement Aat"
[1] 10
[1] "V10: Arrondissement Doornik-Moeskroen"
[1] 11
[1] "V11: Arrondissement Bergen"
[1] 12
[1] "V12: Arrondissement Neufchâteau"
[1] 13
[1] "V13: Arrondissement Bergen"
[1] 14
[1] "V14: Arrondissement Borgworm"
[1] 15
[1] "V15: Arrondissement Dinant Arrondissement Borgworm Arrondissement Dinant"
[1] 16
[1] "V16: Arrondissement Dinant"
[1] 17
[1] "V17: Arrondissement Hoei"
[1] 18
[1] "V18: Arrondissement Luik"
[1] 19
[1] "V19: Arrondissement Leuven"
[1] 20
[1] "V20: Arrondissement Nijvel"
[1] 21
[1] "V21: Arrondissement Turnhout"
[1] 22
[1] "V22: Arrondissement Turnhout"
[1] 23
[1] "V23: Arrondissement Verviers"
[1] 24
[1] "V24: Arrondissement Antwerpen"
[1] 25
[1] "V25: Arrondissement Halle-Vilvoorde"
MapperNodes$Nodesize <- as.integer(vertex.mean)
MapperNodes$Nodename <- municipalities
MapperNodes
forceNetwork(Nodes = MapperNodes, Links = MapperLinks,
Source = "Linksource", Target = "Linktarget",
Value = "Linkvalue", NodeID = "Nodegroup",
Group = "Nodegroup", opacity = 1, opacityNoHover = 1,
linkDistance = 75, charge = -15,
Nodesize = "Nodesize")
It looks like Source/Target is not zero-indexed. This is required in JavaScript and so your plot may not render.